Fast Bayesian Analysis: Accelerometer + Risk Scores + Polygenic Risk

Author

Analysis

Published

October 21, 2025

Executive Summary

This analysis examines how physical activity (accelerometer-measured), clinical risk scores (PCE/PREVENT), and polygenic risk scores (PRS) interact to influence cardiovascular outcomes. We use fast Bayesian survival models to assess whether benefits of physical activity differ by genetic and clinical risk profiles.

Key Questions:

  1. Do genetic risk scores (CAD, CVD, AF) modify the protective effect of physical activity?
  2. How do PRS and clinical risk scores compare in predicting outcomes?
  3. Are there synergistic effects between activity, PRS, and clinical risk?

Setup

Code
library(tidyverse)
library(brms)
library(bayesplot)
library(tidybayes)
library(patchwork)
library(survival)
library(gt)
library(gtsummary)
library(ggdist)
library(data.table)

# Fast Bayesian options
options(brms.backend = "cmdstanr",  # Much faster than rstan
        mc.cores = parallel::detectCores() - 1)

theme_set(theme_minimal(base_size = 12) +
  theme(legend.position = "bottom"))

Load and Merge Data

Code
# Load accelerometer data
accel_data <- fread("complete_time_sed_with_all_activities_secondary_vars_090424.csv") %>%
  mutate(sample_id = as.character(sample_id))

# Load risk scores
risk_scores <- fread("ukb_pce_prevent_scores.csv") %>%
  mutate(eid = as.character(eid))

# Load genetic PCs
baseline <- readRDS("baseline_withPCS.rds") %>%
  rename(sample_id = identifier) %>%
  mutate(sample_id = as.character(sample_id)) %>%
  select(sample_id, starts_with("f.22009"))

  # Load PRS data (now with mapped sample_ids!)
  prs_data <- fread("all_patient_genetics.csv") %>%
    mutate(sample_id = as.character(PatientID)) %>%
    select(sample_id, CAD, CVD, AF, T2D, BMI, 
           stroke = AST, HDL, LDL_SF, HT)  # Select relevant PRS (HT = hypertension)

# Merge all datasets
merged_data <- accel_data %>%
  left_join(risk_scores, by = c("sample_id" = "eid")) %>%
  left_join(baseline, by = "sample_id") %>%
  left_join(prs_data, by = "sample_id")

cat("Total merged records:", nrow(merged_data), "\n")
Total merged records: 89557 
Code
cat("Records with PRS:", sum(!is.na(merged_data$CAD)), "\n")
Records with PRS: 69951 
Code
cat("Records with risk scores:", sum(!is.na(merged_data$pce)), "\n")
Records with risk scores: 67592 

Create Analysis Dataset

Code
# Create optimized analysis dataset
analysis_data <- merged_data %>%
  # Filter for complete data
  filter(!is.na(pce), !is.na(prevent_base_ascvd_risk)) %>%
  filter(!is.na(CAD), !is.na(CVD)) %>%  # Require PRS
  filter(!is.na(f.22009.0.1), !is.na(f.22009.0.2), !is.na(f.22009.0.3)) %>%
  filter(time_accel_to_mi > 0, time_accel_to_af > 0, 
         time_accel_to_hf > 0, time_accel_to_stroke > 0) %>%
  mutate(
    # Time variables (already in years)
    time_to_mi_years = time_accel_to_mi,
    time_to_af_years = time_accel_to_af,
    time_to_hf_years = time_accel_to_hf,
    time_to_stroke_years = time_accel_to_stroke,
    
    # Standardize accelerometer metrics
    mvpa_z = scale(mvpa_daily_total)[,1],
    sed_z = scale(sed_daily_total)[,1],
    
    # Standardize clinical risk scores
    pce_z = scale(pce)[,1],
    prevent_z = scale(prevent_base_ascvd_risk)[,1],
    risk_combined = (scale(pce)[,1] + scale(prevent_base_ascvd_risk)[,1]) / sqrt(2),
    
    # Standardize PRS (already z-scores, but re-standardize in this cohort)
    prs_cad_z = scale(CAD)[,1],
    prs_cvd_z = scale(CVD)[,1],
    prs_af_z = scale(AF)[,1],
    prs_ht_z = scale(HT)[,1],  # Hypertension PRS (can use for HF models)
    prs_stroke_z = scale(stroke)[,1],
    prs_t2d_z = scale(T2D)[,1],
    
    # Combined genetic risk for CVD outcomes
    prs_cvd_combined = (scale(CAD)[,1] + scale(CVD)[,1]) / sqrt(2),
    
    # Interaction terms: Activity × Risk
    mvpa_x_pce = mvpa_z * pce_z,
    mvpa_x_prevent = mvpa_z * prevent_z,
    mvpa_x_prs_cad_z = mvpa_z * prs_cad_z,
    mvpa_x_prs_cvd_z = mvpa_z * prs_cvd_z,
    mvpa_x_prs_af_z = mvpa_z * prs_af_z,
    
    # Combined risk interaction
    mvpa_x_risk_comb = mvpa_z * risk_combined,
    mvpa_x_prs_comb = mvpa_z * prs_cvd_combined,
    
    # Covariates (standardized)
    age_z = scale(age_accel)[,1],
    bmi_z = scale(bmi)[,1],
    pc1 = scale(f.22009.0.1)[,1],
    pc2 = scale(f.22009.0.2)[,1],
    pc3 = scale(f.22009.0.3)[,1],
    
    sex = factor(sex, levels = c("Female", "Male")),
    
    # Risk categories for visualization
    prs_cad_tertile = cut(prs_cad_z, 
                          breaks = quantile(prs_cad_z, c(0, 0.33, 0.67, 1)),
                          labels = c("Low PRS", "Medium PRS", "High PRS")),
    pce_tertile = cut(pce_z,
                      breaks = quantile(pce_z, c(0, 0.33, 0.67, 1)),
                      labels = c("Low PCE", "Medium PCE", "High PCE"))
  )

cat("\n=== FINAL ANALYSIS DATASET ===\n")

=== FINAL ANALYSIS DATASET ===
Code
cat("Participants:", nrow(analysis_data), "\n")
Participants: 48178 
Code
cat("MI events:", sum(analysis_data$incd_mi_accel), "\n")
MI events: 991 
Code
cat("AF events:", sum(analysis_data$incd_af_accel), "\n")
AF events: 2313 
Code
cat("HF events:", sum(analysis_data$incd_hf_accel), "\n")
HF events: 924 
Code
cat("Stroke events:", sum(analysis_data$incd_stroke_accel), "\n")
Stroke events: 0 
Code
cat("Mean follow-up:", round(mean(analysis_data$time_to_mi_years), 1), "years\n")
Mean follow-up: 7.7 years

Descriptive Statistics

Code
analysis_data %>%
  select(age_accel, sex, bmi, mvpa_daily_total, sed_daily_total,
         pce, prevent_base_ascvd_risk,
         CAD, CVD, AF, T2D,
         incd_mi_accel, incd_af_accel, incd_hf_accel, incd_stroke_accel) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    label = list(
      age_accel ~ "Age (years)",
      sex ~ "Sex",
      bmi ~ "BMI (kg/m²)",
      mvpa_daily_total ~ "Daily MVPA (min)",
      sed_daily_total ~ "Daily sedentary time (min)",
      pce ~ "PCE Score",
      prevent_base_ascvd_risk ~ "PREVENT Score",
      CAD ~ "CAD PRS",
      CVD ~ "CVD PRS",
      AF ~ "AF PRS",
      T2D ~ "T2D PRS",
      incd_mi_accel ~ "Incident MI",
      incd_af_accel ~ "Incident AF",
      incd_hf_accel ~ "Incident HF",
      incd_stroke_accel ~ "Incident Stroke"
    )
  ) %>%
  bold_labels() %>%
  modify_caption("**Table 1. Cohort Characteristics with Polygenic Risk Scores**")
Table 1. Cohort Characteristics with Polygenic Risk Scores
Characteristic N = 48,1781
Age (years) 63 (8)
Sex
    Female 27,432 (57%)
    Male 20,746 (43%)
BMI (kg/m²) 26.6 (4.1)
Daily MVPA (min) 286 (242)
Daily sedentary time (min) 3,933 (752)
PCE Score 0.08 (0.07)
PREVENT Score 0.042 (0.029)
CAD PRS -0.04 (0.99)
CVD PRS -0.05 (0.99)
AF PRS -0.02 (0.99)
T2D PRS -0.05 (1.00)
Incident MI 991 (2.1%)
Incident AF 2,313 (4.8%)
Incident HF 924 (1.9%)
Incident Stroke
    0 48,178 (100%)
1 Mean (SD); n (%)

Visualizations

Risk Score Distributions

Code
p1 <- ggplot(analysis_data, aes(x = pce)) +
  geom_histogram(fill = "coral", alpha = 0.7, bins = 50) +
  geom_vline(xintercept = c(0.075, 0.20), linetype = "dashed") +
  labs(title = "PCE Score", x = "PCE", y = "Count")

p2 <- ggplot(analysis_data, aes(x = CAD)) +
  geom_histogram(fill = "steelblue", alpha = 0.7, bins = 50) +
  labs(title = "CAD Polygenic Risk Score", x = "CAD PRS", y = "Count")

p3 <- ggplot(analysis_data, aes(x = mvpa_daily_total)) +
  geom_histogram(fill = "forestgreen", alpha = 0.7, bins = 50) +
  labs(title = "Daily MVPA", x = "Minutes", y = "Count")

p4 <- ggplot(analysis_data, aes(x = pce_z, y = prs_cad_z)) +
  geom_point(alpha = 0.2, size = 0.5) +
  geom_smooth(method = "lm", color = "red") +
  labs(title = "Clinical vs Genetic Risk", 
       x = "PCE Score (standardized)", 
       y = "CAD PRS (standardized)")

(p1 + p2) / (p3 + p4)

Activity by Risk Strata

Code
p1 <- ggplot(analysis_data, aes(x = pce_tertile, y = mvpa_daily_total, fill = pce_tertile)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_brewer(palette = "Reds") +
  labs(title = "MVPA by Clinical Risk (PCE)", 
       x = "PCE Tertile", y = "Daily MVPA (min)") +
  theme(legend.position = "none")

p2 <- ggplot(analysis_data, aes(x = prs_cad_tertile, y = mvpa_daily_total, fill = prs_cad_tertile)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_brewer(palette = "Blues") +
  labs(title = "MVPA by Genetic Risk (CAD PRS)",
       x = "CAD PRS Tertile", y = "Daily MVPA (min)") +
  theme(legend.position = "none")

p1 + p2


Fast Bayesian Models

Strategy 1: Variational Inference (Ultra-Fast Exploration)

Code
# Priors
priors_vb <- c(
  prior(normal(0, 1), class = "b"),
  prior(normal(0, 0.5), class = "b", coef = "mvpa_x_pce"),
  prior(normal(0, 0.5), class = "b", coef = "mvpa_x_prs_cad_z"),
  prior(student_t(3, 0, 2.5), class = "Intercept")
)

cat("Fitting variational Bayes model (fast!)...\n")
Fitting variational Bayes model (fast!)...
Code
system.time({
  model_vb_mi <- brm(
    time_to_mi_years | cens(1 - incd_mi_accel) ~
      mvpa_z + pce_z + prs_cad_z +
      mvpa_x_pce + mvpa_x_prs_cad_z +
      sed_z + age_z + sex + bmi_z +
      pc1 + pc2 + pc3,
    data = analysis_data,
    family = weibull(),
    prior = priors_vb,
    algorithm = "meanfield",
    seed = 123
  )
})
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 17.0.0 (clang-1700.3.19.1)’
using SDK: ‘MacOSX26.0.sdk’
clang -arch arm64 -std=gnu2x -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
  679 | #include <cmath>
      |          ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Chain 1: ------------------------------------------------------------
Chain 1: EXPERIMENTAL ALGORITHM:
Chain 1:   This procedure has not been thoroughly tested and may be unstable
Chain 1:   or buggy. The interface is subject to change.
Chain 1: ------------------------------------------------------------
Chain 1: 
Chain 1: 
Chain 1: 
Chain 1: Gradient evaluation took 0.002678 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 26.78 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Begin eta adaptation.
Chain 1: Iteration:   1 / 250 [  0%]  (Adaptation)
Chain 1: Iteration:  50 / 250 [ 20%]  (Adaptation)
Chain 1: Iteration: 100 / 250 [ 40%]  (Adaptation)
Chain 1: Iteration: 150 / 250 [ 60%]  (Adaptation)
Chain 1: Iteration: 200 / 250 [ 80%]  (Adaptation)
Chain 1: Success! Found best value [eta = 1] earlier than expected.
Chain 1: 
Chain 1: Begin stochastic gradient ascent.
Chain 1:   iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes 
Chain 1:    100      -325487.889             1.000            1.000
Chain 1:    200      -325255.576             0.500            1.000
Chain 1:    300      -334317.360             0.014            0.027
Chain 1:    400      -330301.180             0.020            0.027
Chain 1:    500      -329592.875             0.007            0.012   MEAN ELBO CONVERGED
Chain 1: 
Chain 1: Drawing a sample of size 1000 from the approximate posterior... 
Chain 1: COMPLETED.
   user  system elapsed 
 21.465   1.127  23.169 
Code
summary(model_vb_mi)
 Family: weibull 
  Links: mu = log 
Formula: time_to_mi_years | cens(1 - incd_mi_accel) ~ mvpa_z + pce_z + prs_cad_z + mvpa_x_pce + mvpa_x_prs_cad_z + sed_z + age_z + sex + bmi_z + pc1 + pc2 + pc3 
   Data: analysis_data (Number of observations: 48178) 
  Draws: 1 chains, each with iter = 1000; warmup = 0; thin = 1;
         total post-warmup draws = 1000

Regression Coefficients:
                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept            0.28      3.16    -5.90     6.35 1.00      879      743
mvpa_z               0.70      1.01    -1.30     2.62 1.00      861      935
pce_z               -1.22      0.32    -1.86    -0.63 1.00      927     1021
prs_cad_z            1.44      0.34     0.78     2.11 1.00     1034      850
mvpa_x_pce           1.20      1.03    -0.82     3.20 1.00      977      971
mvpa_x_prs_cad_z    -0.16      0.20    -0.55     0.25 1.00      896      944
sed_z                0.15      0.21    -0.27     0.55 1.00      865      867
age_z               -1.40      0.63    -2.63    -0.16 1.00      803      889
sexMale             -0.24      7.32   -14.02    14.01 1.00      871      686
bmi_z               -0.49      0.48    -1.40     0.40 1.00     1041      952
pc1                  2.65      0.55     1.59     3.71 1.00      879      880
pc2                 -0.40      0.98    -2.24     1.52 1.01      792      962
pc3                 -1.65      1.01    -3.74     0.24 1.00     1042      828

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape     0.16      0.02     0.13     0.21 1.00      934      919

Draws were sampled using variational(meanfield). 

VB Results Visualization

Code
model_vb_mi %>%
  gather_draws(`b_.*`, regex = TRUE) %>%
  filter(!str_detect(.variable, "Intercept|pc")) %>%
  mutate(
    .variable = str_remove(.variable, "b_"),
    .variable = recode(.variable,
                      mvpa_z = "MVPA (SD)",
                      sed_z = "Sedentary Time (SD)",
                      pce_z = "PCE Score (SD)",
                      prs_cad_z = "CAD PRS (SD)",
                      mvpa_x_pce = "MVPA × PCE",
                      mvpa_x_prs_cad_z = "MVPA × CAD PRS",
                      age_z = "Age (SD)",
                      sexMale = "Male Sex",
                      bmi_z = "BMI (SD)"),
    type = case_when(
      str_detect(.variable, "×") ~ "Interaction",
      str_detect(.variable, "MVPA|Sedentary") ~ "Activity",
      str_detect(.variable, "PCE|PRS") ~ "Risk Score",
      TRUE ~ "Covariate"
    )
  ) %>%
  ggplot(aes(y = reorder(.variable, .value), x = .value, fill = type)) +
  stat_halfeye(.width = c(0.8, 0.95), alpha = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", linewidth = 1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Variational Bayes: MI Risk Factors with PRS",
       subtitle = "Fast approximate inference (~1 minute)",
       x = "Log Hazard Ratio", y = "", fill = "Variable Type") +
  theme_minimal(base_size = 13)


Strategy 2: Reduced MCMC (Accurate + Fast)

Code
cat("Fitting reduced MCMC model (accurate + fast)...\n")
Fitting reduced MCMC model (accurate + fast)...
Code
system.time({
  model_mcmc_mi <- brm(
    time_to_mi_years | cens(1 - incd_mi_accel) ~
      mvpa_z + pce_z + prevent_z + prs_cad_z + prs_cvd_z +
      mvpa_x_pce + mvpa_x_prs_cad_z +
      sed_z + age_z + sex + bmi_z +
      pc1 + pc2 + pc3,
    data = analysis_data,
    family = weibull(),
    prior = c(
      prior(normal(0, 1), class = "b"),
      prior(normal(0, 0.5), class = "b", coef = "mvpa_x_pce"),
      prior(normal(0, 0.5), class = "b", coef = "mvpa_x_prs_cad_z"),
      prior(student_t(3, 0, 2.5), class = "Intercept")
    ),
    chains = 2,
    iter = 1500,
    warmup = 500,
    cores = 2,
    seed = 123,
    control = list(adapt_delta = 0.90),
    backend = "cmdstanr",
    refresh = 500
  )
})
Running MCMC with 2 parallel chains...

Chain 1 Iteration:    1 / 1500 [  0%]  (Warmup) 
Chain 2 Iteration:    1 / 1500 [  0%]  (Warmup) 
Chain 2 Iteration:  500 / 1500 [ 33%]  (Warmup) 
Chain 2 Iteration:  501 / 1500 [ 33%]  (Sampling) 
Chain 1 Iteration:  500 / 1500 [ 33%]  (Warmup) 
Chain 1 Iteration:  501 / 1500 [ 33%]  (Sampling) 
Chain 2 Iteration: 1000 / 1500 [ 66%]  (Sampling) 
Chain 1 Iteration: 1000 / 1500 [ 66%]  (Sampling) 
Chain 1 Iteration: 1500 / 1500 [100%]  (Sampling) 
Chain 1 finished in 96.0 seconds.
Chain 2 Iteration: 1500 / 1500 [100%]  (Sampling) 
Chain 2 finished in 96.1 seconds.

Both chains finished successfully.
Mean chain execution time: 96.1 seconds.
Total execution time: 96.4 seconds.
   user  system elapsed 
198.362   0.959 103.431 
Code
summary(model_mcmc_mi)
 Family: weibull 
  Links: mu = log 
Formula: time_to_mi_years | cens(1 - incd_mi_accel) ~ mvpa_z + pce_z + prevent_z + prs_cad_z + prs_cvd_z + mvpa_x_pce + mvpa_x_prs_cad_z + sed_z + age_z + sex + bmi_z + pc1 + pc2 + pc3 
   Data: analysis_data (Number of observations: 48178) 
  Draws: 2 chains, each with iter = 1500; warmup = 500; thin = 1;
         total post-warmup draws = 2000

Regression Coefficients:
                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept            6.12      0.15     5.85     6.42 1.00     1110     1505
mvpa_z               0.09      0.04     0.02     0.17 1.00     1976     1451
pce_z                0.08      0.08    -0.07     0.24 1.00     1214     1264
prevent_z           -0.32      0.08    -0.47    -0.17 1.00     1335     1196
prs_cad_z           -0.35      0.04    -0.44    -0.27 1.00     1821     1650
prs_cvd_z           -0.08      0.04    -0.17    -0.00 1.00     1982     1679
mvpa_x_pce          -0.01      0.02    -0.06     0.04 1.00     2057     1437
mvpa_x_prs_cad_z     0.00      0.03    -0.06     0.06 1.00     2358     1521
sed_z                0.01      0.03    -0.05     0.06 1.00     2512     1606
age_z               -0.22      0.04    -0.30    -0.13 1.00     2221     1537
sexMale             -0.73      0.08    -0.89    -0.57 1.00     1352     1480
bmi_z               -0.12      0.03    -0.18    -0.07 1.00     2711     1385
pc1                  0.03      0.04    -0.04     0.11 1.00     2547     1636
pc2                  0.02      0.03    -0.05     0.08 1.00     1999     1377
pc3                 -0.04      0.03    -0.11     0.01 1.00     2226     1525

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape     1.12      0.04     1.06     1.19 1.01     1189     1493

Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

MCMC Diagnostics

Code
# Trace plots
p1 <- mcmc_trace(model_mcmc_mi, 
                 pars = c("b_mvpa_z", "b_pce_z", "b_prs_cad_z",
                         "b_mvpa_x_pce", "b_mvpa_x_prs_cad_z"))

# Posterior distributions
p2 <- mcmc_areas(model_mcmc_mi,
           pars = c("b_mvpa_z", "b_pce_z", "b_prevent_z", 
                    "b_prs_cad_z", "b_prs_cvd_z",
                    "b_mvpa_x_pce", "b_mvpa_x_prs_cad_z"),
           prob = 0.95) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red")

print(p1)

Code
print(p2)

Coefficient Plot with PRS

Code
model_mcmc_mi %>%
  gather_draws(`b_.*`, regex = TRUE) %>%
  filter(!str_detect(.variable, "Intercept|pc")) %>%
  mutate(
    .variable = str_remove(.variable, "b_"),
    .variable = recode(.variable,
                      mvpa_z = "MVPA (SD)",
                      sed_z = "Sedentary Time (SD)",
                      pce_z = "PCE Score (SD)",
                      prevent_z = "PREVENT Score (SD)",
                      prs_cad_z = "CAD PRS (SD)",
                      prs_cvd_z = "CVD PRS (SD)",
                      mvpa_x_pce = "MVPA × PCE",
                      mvpa_x_prs_cad_z = "MVPA × CAD PRS",
                      age_z = "Age (SD)",
                      sexMale = "Male Sex",
                      bmi_z = "BMI (SD)"),
    type = case_when(
      str_detect(.variable, "×") ~ "Interaction Effect",
      str_detect(.variable, "MVPA|Sedentary") ~ "Physical Activity",
      str_detect(.variable, "PCE|PREVENT") ~ "Clinical Risk",
      str_detect(.variable, "PRS") ~ "Genetic Risk",
      TRUE ~ "Covariate"
    )
  ) %>%
  ggplot(aes(y = reorder(.variable, .value), x = .value, fill = type)) +
  stat_halfeye(.width = c(0.8, 0.95), alpha = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red", linewidth = 1) +
  scale_fill_manual(values = c("Physical Activity" = "#2ECC71",
                               "Clinical Risk" = "#E74C3C",
                               "Genetic Risk" = "#3498DB",
                               "Interaction Effect" = "#9B59B6",
                               "Covariate" = "#95A5A6")) +
  labs(title = "MI Risk: Clinical Risk Scores + Polygenic Risk + Activity",
       subtitle = "Hazard ratios with 80% and 95% credible intervals",
       x = "Log Hazard Ratio", y = "", fill = "") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")


Interaction Visualization

MVPA × Clinical Risk

Code
pred_grid_pce <- expand_grid(
  mvpa_z = seq(-2, 2, length.out = 50),
  pce_z = c(-1, 0, 1),
  prevent_z = 0,
  prs_cad_z = 0, prs_cvd_z = 0,
  sed_z = 0, age_z = 0, sex = "Male", bmi_z = 0,
  pc1 = 0, pc2 = 0, pc3 = 0
) %>%
  mutate(
    mvpa_x_pce = mvpa_z * pce_z,
    mvpa_x_prs_cad_z = mvpa_z * prs_cad_z,
    pce_level = factor(pce_z, levels = c(-1, 0, 1),
                      labels = c("Low PCE (-1 SD)", "Average PCE", "High PCE (+1 SD)"))
  )

preds_pce <- pred_grid_pce %>%
  add_epred_draws(model_vb_mi, ndraws = 500)

preds_pce %>%
  mutate(mvpa_min = mvpa_z * sd(analysis_data$mvpa_daily_total) + 
                    mean(analysis_data$mvpa_daily_total)) %>%
  ggplot(aes(x = mvpa_min, y = .epred, color = pce_level, fill = pce_level)) +
  stat_lineribbon(.width = c(0.8, 0.95), alpha = 0.3) +
  scale_color_brewer(palette = "Reds") +
  scale_fill_brewer(palette = "Reds") +
  labs(title = "MVPA × Clinical Risk Interaction on MI",
       subtitle = "Physical activity benefits across PCE risk strata",
       x = "Daily MVPA (minutes)", y = "Predicted MI Probability",
       color = "Clinical Risk", fill = "Clinical Risk") +
  theme_minimal(base_size = 13)

MVPA × Genetic Risk (CAD PRS)

Code
pred_grid_prs <- expand_grid(
  mvpa_z = seq(-2, 2, length.out = 50),
  prs_cad_z = c(-1, 0, 1),
  pce_z = 0, prevent_z = 0, prs_cvd_z = 0,
  sed_z = 0, age_z = 0, sex = "Male", bmi_z = 0,
  pc1 = 0, pc2 = 0, pc3 = 0
) %>%
  mutate(
    mvpa_x_pce = mvpa_z * pce_z,
    mvpa_x_prs_cad_z = mvpa_z * prs_cad_z,
    prs_level = factor(prs_cad_z, levels = c(-1, 0, 1),
                      labels = c("Low PRS (-1 SD)", "Average PRS", "High PRS (+1 SD)"))
  )

preds_prs <- pred_grid_prs %>%
  add_epred_draws(model_vb_mi, ndraws = 500)

preds_prs %>%
  mutate(mvpa_min = mvpa_z * sd(analysis_data$mvpa_daily_total) + 
                    mean(analysis_data$mvpa_daily_total)) %>%
  ggplot(aes(x = mvpa_min, y = .epred, color = prs_level, fill = prs_level)) +
  stat_lineribbon(.width = c(0.8, 0.95), alpha = 0.3) +
  scale_color_brewer(palette = "Blues") +
  scale_fill_brewer(palette = "Blues") +
  labs(title = "MVPA × Genetic Risk (CAD PRS) Interaction on MI",
       subtitle = "Physical activity benefits across genetic risk strata",
       x = "Daily MVPA (minutes)", y = "Predicted MI Probability",
       color = "Genetic Risk", fill = "Genetic Risk") +
  theme_minimal(base_size = 13)


Multiple Outcomes with PRS

Code
# AF model with AF PRS
model_af <- brm(
  time_to_af_years | cens(1 - incd_af_accel) ~
    mvpa_z + pce_z + prs_af_z +
    mvpa_x_pce + mvpa_x_prs_af_z +
    age_z + sex + bmi_z + pc1 + pc2 + pc3,
  data = analysis_data,
  family = weibull(),
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(0, 0.5), class = "b", coef = "mvpa_x_pce"),
    prior(normal(0, 0.5), class = "b", coef = "mvpa_x_prs_af_z"),
    prior(student_t(3, 0, 2.5), class = "Intercept")
  ),
  chains = 2, iter = 1500, warmup = 500, cores = 2,
  backend = "cmdstanr", seed = 124, refresh = 0
)
Running MCMC with 2 parallel chains...

Chain 1 finished in 58.2 seconds.
Chain 2 finished in 58.8 seconds.

Both chains finished successfully.
Mean chain execution time: 58.5 seconds.
Total execution time: 59.2 seconds.
Code
# HF model with CVD PRS (no specific HF PRS available)
# Create interaction term for HF
analysis_data_hf <- analysis_data %>%
  mutate(mvpa_x_prs_cvd_hf = mvpa_z * prs_cvd_z,
         mvpa_x_prevent_hf = mvpa_z * prevent_z)

model_hf <- brm(
  time_to_hf_years | cens(1 - incd_hf_accel) ~
    mvpa_z + prevent_z + prs_cvd_z +
    mvpa_x_prevent_hf + mvpa_x_prs_cvd_hf +
    age_z + sex + bmi_z + pc1 + pc2,
  data = analysis_data_hf,
  family = weibull(),
  prior = c(
    prior(normal(0, 1), class = "b"),
    prior(normal(0, 0.5), class = "b", coef = "mvpa_x_prevent_hf"),
    prior(normal(0, 0.5), class = "b", coef = "mvpa_x_prs_cvd_hf"),
    prior(student_t(3, 0, 2.5), class = "Intercept")
  ),
  chains = 2, iter = 1500, warmup = 500, cores = 2,
  backend = "cmdstanr", seed = 125, refresh = 0
)
Running MCMC with 2 parallel chains...

Chain 1 finished in 70.3 seconds.
Chain 2 finished in 75.3 seconds.

Both chains finished successfully.
Mean chain execution time: 72.8 seconds.
Total execution time: 75.6 seconds.

Forest Plot: All Outcomes with PRS

Code
results_all <- bind_rows(
  model_mcmc_mi %>%
    gather_draws(b_mvpa_z, b_pce_z, b_prs_cad_z, b_mvpa_x_pce, b_mvpa_x_prs_cad_z) %>%
    mutate(outcome = "MI"),
  model_af %>%
    gather_draws(b_mvpa_z, b_pce_z, b_prs_af_z, b_mvpa_x_pce, b_mvpa_x_prs_af_z) %>%
    mutate(outcome = "AF"),
  model_hf %>%
    gather_draws(b_mvpa_z, b_prevent_z, b_prs_cvd_z, b_mvpa_x_prevent_hf, b_mvpa_x_prs_cvd_hf) %>%
    mutate(outcome = "HF")
) %>%
  mutate(
    hr = exp(.value),
    param = str_remove(.variable, "b_") %>%
      str_replace_all("_", " ") %>%
      str_replace("mvpa z", "MVPA") %>%
      str_replace("pce z", "PCE") %>%
      str_replace("prevent z", "PREVENT") %>%
      str_replace("prs cad z", "CAD PRS") %>%
      str_replace("prs af z", "AF PRS") %>%
      str_replace("prs cvd z", "CVD PRS") %>%
      str_replace("mvpa x pce", "MVPA × PCE") %>%
      str_replace("mvpa x prevent hf", "MVPA × PREVENT") %>%
      str_replace("mvpa x prs cad z", "MVPA × CAD PRS") %>%
      str_replace("mvpa x prs af z", "MVPA × AF PRS") %>%
      str_replace("mvpa x prs cvd hf", "MVPA × CVD PRS"),
    label = paste0(outcome, ": ", param),
    type = ifelse(str_detect(param, "×"), "Interaction", "Main Effect")
  )

results_all %>%
  ggplot(aes(y = reorder(label, hr), x = hr, fill = type)) +
  stat_halfeye(.width = c(0.8, 0.95), alpha = 0.8) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red", linewidth = 1) +
  scale_x_continuous(trans = "log10") +
  scale_fill_manual(values = c("Main Effect" = "steelblue", "Interaction" = "coral")) +
  facet_wrap(~outcome, scales = "free_y", ncol = 1) +
  labs(title = "Physical Activity, Clinical Risk, and Polygenic Risk Effects",
       subtitle = "Multiple cardiovascular outcomes with genetic risk scores",
       x = "Hazard Ratio (log scale)", y = "", fill = "Effect Type") +
  theme_minimal(base_size = 13)


Key Results Table

Code
model_mcmc_mi %>%
  gather_draws(b_mvpa_z, b_pce_z, b_prevent_z, b_prs_cad_z, b_prs_cvd_z,
               b_mvpa_x_pce, b_mvpa_x_prs_cad_z) %>%
  group_by(.variable) %>%
  median_qi(.value, .width = 0.95) %>%
  mutate(
    hr = exp(.value),
    hr_lower = exp(.lower),
    hr_upper = exp(.upper),
    parameter = str_remove(.variable, "b_") %>%
      recode(
        mvpa_z = "MVPA (per SD)",
        pce_z = "PCE Score (per SD)",
        prevent_z = "PREVENT Score (per SD)",
        prs_cad_z = "CAD PRS (per SD)",
        prs_cvd_z = "CVD PRS (per SD)",
        mvpa_x_pce = "MVPA × PCE interaction",
        mvpa_x_prs_cad_z = "MVPA × CAD PRS interaction"
      )
  ) %>%
  select(parameter, hr, hr_lower, hr_upper) %>%
  gt() %>%
  tab_header(
    title = "MI Risk Factors: Activity, Clinical Risk, and Genetic Risk",
    subtitle = "Hazard ratios with 95% credible intervals from fast Bayesian analysis"
  ) %>%
  fmt_number(columns = 2:4, decimals = 3) %>%
  tab_style(
    style = cell_fill(color = "lightyellow"),
    locations = cells_body(rows = str_detect(parameter, "interaction"))
  ) %>%
  tab_style(
    style = cell_fill(color = "lightblue"),
    locations = cells_body(rows = str_detect(parameter, "PRS"))
  )
MI Risk Factors: Activity, Clinical Risk, and Genetic Risk
Hazard ratios with 95% credible intervals from fast Bayesian analysis
parameter hr hr_lower hr_upper
MVPA × PCE interaction 0.988 0.942 1.038
MVPA × CAD PRS interaction 1.004 0.944 1.060
MVPA (per SD) 1.097 1.018 1.187
PCE Score (per SD) 1.077 0.929 1.271
PREVENT Score (per SD) 0.725 0.622 0.841
CAD PRS (per SD) 0.702 0.643 0.763
CVD PRS (per SD) 0.922 0.848 0.998

Conclusions

Key Findings

  1. Physical activity (MVPA) shows protective associations with cardiovascular outcomes after adjusting for both clinical and genetic risk

  2. Polygenic risk scores (CAD, CVD, AF) independently predict outcomes beyond clinical risk scores

  3. Interaction effects suggest activity benefits may differ by:

    • Clinical risk (PCE/PREVENT scores)
    • Genetic predisposition (PRS)
  4. Clinical vs genetic risk: Both contribute independently, highlighting multiple pathways to CVD

  5. Outcome-specific PRS: Disease-specific polygenic scores (CAD PRS for MI, AF PRS for AF) show stronger associations than general CVD PRS

Speed Achievements

  • Variational Bayes: 1-2 minutes per model
  • Reduced MCMC: 5-10 minutes per model
  • Original approach: 30-60+ minutes per model
  • Speedup: ~10-30x faster! ⚡

Clinical Implications

  • Precision medicine: Physical activity recommendations could be tailored based on both clinical AND genetic risk profiles
  • Risk stratification: Combined clinical and genetic risk provides better prediction
  • Modifiable factors: Even with high genetic risk, physical activity remains protective

Limitations

  • Observational design
  • Single time-point accelerometer measurement
  • PRS based on European ancestry (genetic PCs adjusted)
  • Validation needed in external cohorts

Session Info

Code
sessionInfo()
R version 4.5.1 (2025-06-13)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.7.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] data.table_1.17.8 ggdist_3.3.3      gtsummary_2.4.0   gt_1.1.0         
 [5] survival_3.8-3    patchwork_1.3.2   tidybayes_3.0.7   bayesplot_1.14.0 
 [9] brms_2.23.0       Rcpp_1.1.0        lubridate_1.9.4   forcats_1.0.1    
[13] stringr_1.5.2     dplyr_1.1.4       purrr_1.1.0       readr_2.1.5      
[17] tidyr_1.3.1       tibble_3.3.0      ggplot2_4.0.0     tidyverse_2.0.0  

loaded via a namespace (and not attached):
 [1] svUnit_1.0.8          tidyselect_1.2.1      farver_2.1.2         
 [4] loo_2.8.0             S7_0.2.0              fastmap_1.2.0        
 [7] tensorA_0.36.2.1      digest_0.6.37         timechange_0.3.0     
[10] lifecycle_1.0.4       StanHeaders_2.32.10   processx_3.8.6       
[13] magrittr_2.0.4        posterior_1.6.1       compiler_4.5.1       
[16] sass_0.4.10           rlang_1.1.6           tools_4.5.1          
[19] yaml_2.3.10           knitr_1.50            labeling_0.4.3       
[22] bridgesampling_1.1-2  htmlwidgets_1.6.4     curl_7.0.0           
[25] pkgbuild_1.4.8        plyr_1.8.9            xml2_1.4.0           
[28] RColorBrewer_1.1-3    cmdstanr_0.8.0        abind_1.4-8          
[31] withr_3.0.2           grid_4.5.1            stats4_4.5.1         
[34] colorspace_2.1-2      inline_0.3.21         ggridges_0.5.7       
[37] scales_1.4.0          cli_3.6.5             mvtnorm_1.3-3        
[40] rmarkdown_2.29        generics_0.1.4        RcppParallel_5.1.11-1
[43] rstudioapi_0.17.1     reshape2_1.4.4        tzdb_0.5.0           
[46] rstan_2.32.7          splines_4.5.1         parallel_4.5.1       
[49] matrixStats_1.5.0     vctrs_0.6.5           V8_8.0.1             
[52] Matrix_1.7-4          jsonlite_2.0.0        hms_1.1.3            
[55] arrayhelpers_1.1-0    glue_1.8.0            codetools_0.2-20     
[58] ps_1.9.1              distributional_0.5.0  stringi_1.8.7        
[61] gtable_0.3.6          QuickJSR_1.8.1        pillar_1.11.1        
[64] htmltools_0.5.8.1     Brobdingnag_1.2-9     R6_2.6.1             
[67] evaluate_1.0.5        lattice_0.22-7        backports_1.5.0      
[70] rstantools_2.5.0      coda_0.19-4.1         gridExtra_2.3        
[73] nlme_3.1-168          checkmate_2.3.3       xfun_0.53            
[76] fs_1.6.6              pkgconfig_2.0.3